{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Excel File Manipulation with pandas"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Using pandas with Excel Files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Case Study: Excel Reporting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"sales_data/new/January.xlsx\")\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reading Excel Files with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"xl/stores.xlsx\",\n",
    "                   sheet_name=\"2019\", skiprows=1, usecols=\"B:F\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fix_missing(x):\n",
    "    return False if x in [\"\", \"MISSING\"] else x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"xl/stores.xlsx\",\n",
    "                   sheet_name=\"2019\", skiprows=1, usecols=\"B:F\",\n",
    "                   converters={\"Flagship\": fix_missing})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The Flagship column now has Dtype \"bool\"\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sheets = pd.read_excel(\"xl/stores.xlsx\", sheet_name=[\"2019\", \"2020\"],\n",
    "                       skiprows=1, usecols=[\"Store\", \"Employees\"])\n",
    "sheets[\"2019\"].head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"xl/stores.xlsx\", sheet_name=0,\n",
    "                   skiprows=2, skipfooter=3,\n",
    "                   usecols=\"B:C,F\", header=None,\n",
    "                   names=[\"Branch\", \"Employee_Count\", \"Is_Flagship\"])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"xl/stores.xlsx\", sheet_name=\"2019\",\n",
    "                   skiprows=1, usecols=\"B,C,F\", skipfooter=2,\n",
    "                   na_values=\"MISSING\", keep_default_na=False)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "f = open(\"output.txt\", \"w\")\n",
    "f.write(\"Some text\")\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Context Managers and the with Statement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"output.txt\", \"w\") as f:\n",
    "    f.write(\"Some text\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with pd.ExcelFile(\"xl/stores.xls\") as f:\n",
    "    df1 = pd.read_excel(f, \"2019\", skiprows=1, usecols=\"B:F\", nrows=2)\n",
    "    df2 = pd.read_excel(f, \"2020\", skiprows=1, usecols=\"B:F\", nrows=2)\n",
    "\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stores = pd.ExcelFile(\"xl/stores.xlsx\")\n",
    "stores.sheet_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = (\"https://raw.githubusercontent.com/fzumstein/\"\n",
    "       \"python-for-excel/1st-edition/xl/stores.xlsx\")\n",
    "pd.read_excel(url, skiprows=1, usecols=\"B:E\", nrows=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Writing Excel Files with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import datetime as dt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = [[dt.datetime(2020,1,1, 10, 13), 2.222, 1, True],\n",
    "        [dt.datetime(2020,1,2), np.nan, 2, False],\n",
    "        [dt.datetime(2020,1,2), np.inf, 3, True]]\n",
    "df = pd.DataFrame(data=data,\n",
    "                  columns=[\"Dates\", \"Floats\", \"Integers\", \"Booleans\"])\n",
    "df.index.name=\"index\"\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_excel(\"written_with_pandas.xlsx\", sheet_name=\"Output\",\n",
    "            startrow=1, startcol=1, index=True, header=True,\n",
    "            na_rep=\"<NA>\", inf_rep=\"<INF>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with pd.ExcelWriter(\"written_with_pandas2.xlsx\") as writer:\n",
    "    df.to_excel(writer, sheet_name=\"Sheet1\", startrow=1, startcol=1)\n",
    "    df.to_excel(writer, sheet_name=\"Sheet1\", startrow=10, startcol=1)\n",
    "    df.to_excel(writer, sheet_name=\"Sheet2\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
